/******************************************************************************* * Copyright (c) 2002, 2013 Object Factory Inc. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Object Factory Inc. - Initial implementation *******************************************************************************/ package org.eclipse.ant.internal.ui.dtd.schema; import com.ibm.icu.text.MessageFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import org.eclipse.ant.internal.ui.dtd.ParseError; import org.eclipse.ant.internal.ui.dtd.util.SortedMap; import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory; /** * NfmParser parses an NFA and returns an equivalent DFA if it can do so in linear time and space (in terms of the original NFA). * <p> * * As used here, NfmParser is called lazily when someone actually asks for a Dfm. This is for performance reasons. Why go to the work of calculating * all those Dfms if nobody ever uses them? * * Well-formed errors in content models have already been detected. The only error that can arise in NfmParser is an ambiguity error. * * Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA constructed from it in linear time is actually a DFA. This is obvious * in NfmParser. The algorithm works by removing all ambiguous transitions as the graph is constructed, then proving that the reduced graph is * equivalent to the original in time linear in the number of ambiguous transitions. * * An effort is made to keep the DFA small, but there is no optimization step, as DFAs are small anyway, with some linear inflation around *. In a * pathological case, like the classical (a*,a*,a*,..., a*) the number of transitions in the DFA can be quadratic but this algorithm will not blow up * exponentially. * * @author Bob Foster */ public class NfmParser { public Dfm parse(Nfm nfm) throws ParseError { // Parse nfm into dfm Dfm dfm = parseStart(nfm.getStart(), nfm.getStop()); // Make list of dfms in graph ArrayList<Dfm> dfms = new ArrayList<>(); collect(dfm, dfms); // Detect accept conflicts HashMap<Dfm, Dfm> duplicates = new HashMap<>(); detect(dfms, duplicates); // Replace duplicate dfms in graph replace(dfms, duplicates); // Allow nfm memory to be re-used Nfm.free(nfm); NfmNode.freeAll(); return dfm; } private void reportError(String name) throws ParseError { throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new Object[] { name })); } public static void collect(Dfm dfm, List<Dfm> dfms) { dfms.add(dfm); collect1(dfm, dfms); } private static void collect1(Dfm dfm, List<Dfm> dfms) { Object[] follows = dfm.getValues(); if (follows != null) { for (int i = 0; i < follows.length; i++) { Dfm follow = (Dfm) follows[i]; if (!dfms.contains(follow)) { dfms.add(follow); collect1(follow, dfms); } } } } /** * Replace duplicate dfms found during conflict resolution. */ private void replace(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> removed) { for (Dfm dfm : dfms) { Object[] follows = dfm.getValues(); if (follows != null) { for (int j = 0; j < follows.length; j++) { Dfm replacement, follow = (Dfm) follows[j]; while ((replacement = removed.get(follow)) != null) follow = replacement; follows[j] = follow; } } } // release dfms so can be re-used for (Dfm dfm : removed.keySet()) { Dfm.free(dfm); } } /** * Detect conflicts in each state. Two transitions are a potential conflict if they accept the same string value. They are an actual conflict if * their follow dfms are not identical and they are an actual ambiguity if the transition atoms of the follow dfms are not pairwise identical. * This is derived from the rule of Bruggemann-Klein, which determines that (a|a)b is not ambiguous, but both (a,b)|(a,c) and (a,b)|(a,b) are. The * latter might be surprising, but that's committee work for you. If two transitions are not ambiguous, one can be removed without affecting the * language accepted, and thus we have converted our epsilon-free NFA into a DFA. If any two transitions are ambiguous, we report an error and our * responsibility ends. Note that no transitions can be removed until all have been checked; this might disguise the ambiguity in, e.g., * ((a|a),b,(a|a))*. */ private void detect(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> duplicates) throws ParseError { for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) { Dfm dfm = iter.next(); Object[] accepts = dfm.getKeys(); Object[] follows = dfm.getValues(); if (accepts != null) { String last = null; for (int i = 0, lasti = -1; i < accepts.length; i++) { String accept = accepts[i].toString(); // accepts strings are interned allowing identity comparison if (last != null && last == accept) { if (follows[i] != follows[lasti]) checkConflict(new Conflict(accept, (Dfm) follows[lasti], (Dfm) follows[i])); } else { last = accept; lasti = i; } } } } // once more for removal for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) { Dfm dfm = iter.next(); // record conflicts Object[] accepts = dfm.getKeys(); Object[] follows = dfm.getValues(); boolean remove = false; if (accepts != null) { boolean[] removes = new boolean[accepts.length]; String last = null; for (int i = 0, lasti = -1; i < accepts.length; i++) { String accept = accepts[i].toString(); if (last != null && last == accept) { remove = true; removes[i] = true; if (follows[i] != follows[lasti]) { Dfm dfmhi = (Dfm) follows[i]; Dfm dfmlo = (Dfm) follows[lasti]; if (dfmhi.id < dfmlo.id) { Dfm tmp = dfmhi; dfmhi = dfmlo; dfmlo = tmp; } Dfm dup = duplicates.get(dfmhi); if (dup == null || dfmlo.id < dup.id) { duplicates.put(dfmhi, dfmlo); } else { duplicates.put(dfmlo, dup); } } } else { last = accept; lasti = i; } } if (remove) { SortedMap map = dfm.getMap(); int i = 0; for (Iterator<?> iterator = map.keyIterator(); iterator.hasNext(); i++) { iterator.next(); if (removes[i]) iterator.remove(); } SortedMapFactory.freeMap(map); } } } } /** * Check conflict and report ambiguity. * * @param conflict * Potential ambiguity */ private void checkConflict(Conflict conflict) throws ParseError { if (conflict.dfm1.accepting != conflict.dfm2.accepting) { reportError(conflict.name); } Object[] accept1 = conflict.dfm1.getKeys(); Object[] accept2 = conflict.dfm2.getKeys(); if ((accept1 == null) != (accept2 == null)) { reportError(conflict.name); } if (accept1 != null) { if (accept1.length != accept2.length) { reportError(conflict.name); } for (int j = 0; j < accept2.length; j++) { if (accept1[j] != accept2[j]) { reportError(conflict.name); } } } } /** * Recursive parse that visits every node reachable from the start symbol. */ private Dfm parseStart(NfmNode start, NfmNode accept) { // mark the start node Dfm result = Dfm.dfm(false); start.dfm = result; // we can minimize alias dfms by marking all starting transfer links while (start.next1 != null && start.next2 == null && start.symbol == null) { start = start.next1; start.dfm = result; } Dfm parsed = parse(1, start, accept); result.merge(parsed); Dfm.free(parsed); return result; } private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) { Dfm parsed = parse(mark + 1, start, accept); result.merge(parsed); Dfm.free(parsed); } /** * Recursive parse that visits every node reachable from the start symbol. */ private Dfm parse(int mark, NfmNode start, NfmNode accept) { // eliminate useless recursion (note that accept node has no branches) while (start.next1 != null && start.next2 == null && start.symbol == null) start = start.next1; // if we reached the accept node, return an empty dfm that accepts if (start == accept) return Dfm.dfm(true); // for a symbol, construct a dfm that accepts the symbol if (start.symbol != null) { Dfm nextdfm = null; NfmNode next = start.next1, snext = next; while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null) snext = snext.next1; if (snext.dfm != null) { for (NfmNode n = next; n != snext; n = n.next1) n.dfm = snext.dfm; nextdfm = snext.dfm; } else { nextdfm = Dfm.dfm(false); snext.dfm = nextdfm; for (NfmNode n = next; n != snext; n = n.next1) n.dfm = nextdfm; parseNext(mark, nextdfm, snext, accept); } Dfm dfm = Dfm.dfm(start.symbol, nextdfm); return dfm; } // otherwise, follow both branches and return the combined result Dfm dfm1 = null, dfm2 = null; int saveMark; if (start.next1 != null && start.next1.mark != mark) { saveMark = start.next1.mark; start.next1.mark = mark; dfm1 = parse(mark, start.next1, accept); start.next1.mark = saveMark; } if (start.next2 != null && start.next2.mark != mark) { saveMark = start.next2.mark; start.next2.mark = mark; dfm2 = parse(mark, start.next2, accept); start.next2.mark = saveMark; } if (dfm2 != null) { if (dfm1 != null) dfm1.merge(dfm2); else dfm1 = dfm2; } return dfm1; } private static class Conflict { public String name; public Dfm dfm1, dfm2; public Conflict(String name, Dfm dfm1, Dfm dfm2) { this.name = name; this.dfm1 = dfm1; this.dfm2 = dfm2; } @Override public int hashCode() { return dfm1.hashCode() + dfm2.hashCode(); } @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof Conflict)) return false; Conflict other = (Conflict) o; return (dfm1 == other.dfm1 && dfm2 == other.dfm2) || (dfm1 == other.dfm2 && dfm2 == other.dfm1); } } }